R, RStudio ve RMarkdown ile Tekrarlanabilir Rapor1

Serdar Balcı, MD, Pathologist

2019-09-30

Tekrarlanabilir Analiz ve Rapor

Replication Crisis

https://en.wikipedia.org/wiki/Replication_crisis

Replication Crisis Excel Version

RStudio ile proje oluştur

R Notebook

R Notebook dökümanı oluşturma

R Notebook’tan html, pdf ve word oluşturma

RNotebook vs RMarkdown

https://youtu.be/zNzZ1PfUDNk

R Markdown

Hem kendi kodları hem de html kodları yazılabilir

https://rmarkdown.rstudio.com

What is R Markdown? from RStudio, Inc. on Vimeo.

R Markdown: The Definitive Guide

https://bookdown.org/yihui/rmarkdown/

R Markdown syntax

https://gist.github.com/MinhasKamal/7fdebb7c424d23149140

Remedy Package

Remedy

R Markdown paket ve şablonları

https://bookdown.org/yihui/rmarkdown/document-templates.html

Render Markdown via code

inside R

markdown::markdownToHTML('markdown_example.md', 
'markdown_example.html')

command line

R -e "markdown::markdownToHTML('markdown_example.md',
'markdown_example.html')"

pandoc Rstudio integration

command line

export PATH=$PATH:/Applications/RStudio.app/Contents/MacOS/pandoc
R -e "rmarkdown::render('markdown_example.md')"

RMarkdown chunk içinde R kodlarını çalıştırma

{r, results='asis'}
 iris %>%
  tibble::as_tibble() %>%
  details::details(summary = 'tibble')

Metin arasında R kodlarını çalıştırma

Chunk Options

Global Options

{r global_options, include=FALSE}
knitr::opts_chunk$set(fig.width = 12,
                      fig.height = 8,
                      fig.path = 'Figs/',
                      echo = FALSE,
                      warning = FALSE,
                      message = FALSE,
                      error = FALSE,
                      eval = TRUE,
                      tidy = TRUE,
                      comment = NA)

Other Code Languages

R Markdown kod örneği

{r}
data("cancer")
cancer
foreign::write.foreign(df = cancer,
                        datafile = "data/cancer.sav",
                        codefile = "data/cancer.spo",
                        package = "SPSS"
                        )

R Markdown Paket Çağırma 📦

{r}
suppressPackageStartupMessages(library("tidyverse"))
suppressPackageStartupMessages(library("survival"))

Sık kullandığım paketler 📦

{tidyverse} {tidylog}

{lubridate} {janitor}

{readxl} {foreign}

{summarytools} {ggstatsplot} {tangram} {finalfit} {psycho} {jmv}

{survival} {survminer}

{report} {kableExtra}

R Markdown Veri Yükleme SPSS

R Markdown Veri Yükleme Excel

Veri Düzenleme

{r, eval=FALSE, include=FALSE, eval=FALSE, include=FALSE}
mydata <- janitor::clean_names(mydata)
cat(names(mydata), sep = ",\n")
{r}
glimpse(mydata)
{r, eval=FALSE, include=FALSE, eval=FALSE, include=FALSE}
## Recoding mydata$cinsiyet into mydata$Cinsiyet
mydata$Cinsiyet <- recode(mydata$cinsiyet,
               "K" = "Kadin",
               "E" = "Erkek")
mydata$Cinsiyet <- factor(mydata$Cinsiyet)
{r recode TNM stage, eval=FALSE, include=FALSE}
mydata$Tstage <- stringr::str_match(mydata$patolojik_evre, paste('(.+)', "N", sep=''))[,2]

mydata$Nstage <- paste0("N",
    stringr::str_match(mydata$patolojik_evre, paste( "N", '(.+)', "M", sep=''))[,2]
    )

mydata$Mstage <- paste0("M", 
    stringr::str_match(mydata$patolojik_evre, paste("M", '(.+)', sep=''))[,2]
)
{r recode TNM2, eval=FALSE, include=FALSE}
mydata <- mydata %>% 
    mutate(
        T_stage = case_when(
            grepl(pattern = "T1", x = .$Tstage) == TRUE ~ "T1",
            grepl(pattern = "T2", x = .$Tstage) == TRUE ~ "T2",
            grepl(pattern = "T3", x = .$Tstage) == TRUE ~ "T3",
            grepl(pattern = "T4", x = .$Tstage) == TRUE ~ "T4",
            TRUE ~ "Tx"
        )
    ) %>% 
mutate(
        N_stage = case_when(
            grepl(pattern = "N0", x = .$Nstage) == TRUE ~ "N0",
            grepl(pattern = "N1", x = .$Nstage) == TRUE ~ "N1",
            grepl(pattern = "N2", x = .$Nstage) == TRUE ~ "N2",
            grepl(pattern = "N3", x = .$Nstage) == TRUE ~ "N3",
            TRUE ~ "Nx"
        )
    ) %>% 
mutate(
        M_stage = case_when(
            grepl(pattern = "M0", x = .$Mstage) == TRUE ~ "M0",
            grepl(pattern = "M1", x = .$Mstage) == TRUE ~ "M1",
            TRUE ~ "Mx"
        )
    )

{r, eval=FALSE, include=FALSE}
mydata <- mydata %>% 
    mutate(
TumorPDL1gr1 = case_when(
        t_pdl1 < 1 ~ "kucuk1",
        t_pdl1 >= 1 ~ "buyukesit1"
    )
    ) %>% 
mutate(
TumorPDL1gr5 = case_when(
        t_pdl1 < 5 ~ "kucuk5",
        t_pdl1 >= 5 ~ "buyukesit5"
    )
    )   %>% 
mutate(
inflPDL1gr1 = case_when(
        i_pdl1 < 1 ~ "kucuk1",
        i_pdl1 >= 1 ~ "buyukesit1"
    )
    ) %>% 
mutate(
inflPDL1gr5 = case_when(
        i_pdl1 < 5 ~ "kucuk5",
        i_pdl1 >= 5 ~ "buyukesit5"
    )
    )
{r, eval=FALSE, include=FALSE}
mydata$sontarih <- janitor::excel_numeric_to_date(as.numeric(mydata$olum_tarihi))
{r, eval=FALSE, include=FALSE}
mydata$Outcome <- "Dead"
mydata$Outcome[mydata$olum_tarihi == "yok"] <- "Alive"

R Markdown Tanımlayıcı İstatistikler

{r}
library(summarytools)
view(dfSummary(colon_s))

Table One

{r, results='asis'}
# cat(names(mydata), sep = " + \n")
library(arsenal)
tab1 <- tableby(~ Cinsiyet + 
Yas + 
TumorYerlesimi
                ,
                data = mydata)
summary(tab1)

The Grammar of Tables

tangram: The Grammar of Tables

A grammar of tables

Grammar of Tables?

Easily generate information-rich, publication-quality tables from R

Kategorik Veriler

{r ", names(mydataCategorical)[i], "}
mydataCategorical %>% 
  janitor::tabyl(", names(mydataCategorical)[i], ") %>%
  adorn_pct_formatting(rounding = 'half up', digits = 1) %>%
  knitr::kable()
{r crosstable", dependent_variable, "}
mydata %>%
    summary_factorlist(dependent = '", dependent_variable, "', 
                       explanatory = explanatory,
                       total_col = TRUE,
                       p = TRUE,
                       add_dependent_label = TRUE) -> table

knitr::kable(table, row.names = FALSE, align = c('l', 'l', 'r', 'r', 'r'))

Kategorik Veriler için Grafikler

{r ggstatplot ", dependent_variable , " vs ", mydataCategorical_variable, ", layout='l-page'}
mydata %>% 
    ggstatsplot::ggbarstats(data = ., main = ", mydataCategorical_variable, ", condition = ", dependent_variable, ")

Continious Variables

{r ", names(mydataContinious)[i], "}
mydataContinious %>% 
jmv::descriptives(
    data = .,
    vars = ", names(mydataContinious)[i], ",
    hist = TRUE,
    dens = TRUE,
    box = TRUE,
    violin = TRUE,
    dot = TRUE,
    mode = TRUE,
    sd = TRUE,
    variance = TRUE,
    skew = TRUE,
    kurt = TRUE,
    quart = TRUE)

R Markdown Recode

recode

R Markdown örneği Çapraz Tablolar

library(finalfit)

{r crosstable", dependent_variable, ", message=FALSE, warning=FALSE}
mydata %>%
    summary_factorlist(dependent = '", dependent_variable, "', 
                       explanatory = explanatory,
                       column = TRUE,
                       total_col = TRUE,
                       p = TRUE,
                       add_dependent_label = TRUE,
                       na_include=FALSE
                       # catTest = catTestfisher
                       ) -> table

knitr::kable(table, row.names = FALSE, align = c('l', 'l', 'r', 'r', 'r'))

R Markdown örneği Sağkalım

Kaplan-Meier

{r Kaplan-Meier}
mydata %>%
  finalfit::surv_plot(dependent,
                      explanatory,
                      xlab='Time (months)',
                      pval=TRUE,
                      legend = 'none',
                      break.time.by = 12,
                      xlim = c(0,60),
                      legend.labs = c('a','b')
)

Sağkalım Tabloları

{r}
km_fit <- survfit(dependent ~ explanatory,
                  data = mydata)
km_fit
{r, eval=FALSE, include=FALSE}
library(survival)
km <- with(mydata, Surv(OverallTime, Outcome2))
# head(km,80)
# plot(km)
{r 1-3-5-yr}
summary(km_fit, times = c(12,36,60))

Pairwise comparison

{r eval=FALSE, include=FALSE}
survminer::pairwise_survdiff(formula = Surv(time, Outcome) ~ ShellAnteriorOnly, 
                             data = mydata,
                             p.adjust.method = "BH")

Multivariate Analysis Survival

{r Multivariate Analysis, eval=FALSE, include=FALSE}
library(finalfit)
library(survival)
explanatoryMultivariate <- explanatoryKM
dependentMultivariate <- dependentKM

mydata %>%
  finalfit(dependentMultivariate, explanatoryMultivariate) -> tMultivariate

knitr::kable(tMultivariate, row.names=FALSE, align=c("l", "l", "r", "r", "r", "r"))
{r define survival time, eval=FALSE, include=FALSE}
mydata$int <- lubridate::interval(
  lubridate::ymd(mydata$CerrahiTarih),
  lubridate::ymd(mydata$SonTarih)
  )
mydata$OverallTime <- lubridate::time_length(mydata$int, "month")
mydata$OverallTime <- round(mydata$OverallTime, digits = 1)
{r, eval=FALSE, include=FALSE}
mydata$OverallTime <- mydata$genel_sagkalim
{r, eval=FALSE, include=FALSE}
## Recoding mydata$Outcome into mydata$Outcome2
mydata$Outcome2 <- recode(mydata$Outcome,
               "Alive" = "0",
               "Dead" = "1")
mydata$Outcome2 <- as.numeric(mydata$Outcome2)

jamovi ve R entegrasyonu

Rj Editor – Analyse your data with R in jamovi

{jmv} paket kodları

jamovi syntax mode

Güncellemeler olunca kodlar çalışacak mı?

Paket Kütüphaneleri

  • packrat / renv

https://environments.rstudio.com

Docker

  • docker

The Rocker Project

Docker Containers for the R Environment

docker run --rm -ti rocker/r-base

Or get started with an RStudio® instance:

docker run -e PASSWORD=yourpassword --rm -p 8787:8787 rocker/rstudio

and point your browser to localhost:8787 Log in with user/password rstudio/yourpassword

Managing containers

Yeni R sürümleri

  • RSwitch

https://rud.is/rswitch/

  • Using RSwitch

https://rud.is/rswitch/guide/

: scale 30%

Yedeklemeyi nasıl yapacağız

Projeyi düzgün organize edin

  • pdf
  • R
  • images
  • bib
{r load library, include=FALSE}
source(file = here::here("R", "loadLibrary.R"))

Save Final Data

{r}
saved data after analysis to `mydata.xlsx`.

save.image(file = here::here("data", "mydata_work_space.RData"))

readr::write_rds(x = mydata, path = here::here("data", "mydata_afteranalysis.rds"))

saveRDS(object = mydata, file = here::here("data", "mydata.rds"))

writexl::write_xlsx(mydata, here::here("data", "mydata.xlsx"))

paste0(rownames(file.info(here::here("data", "mydata.xlsx"))), " : ", file.info(here::here("data", "mydata.xlsx"))$ctime)

GitHub

{r github push}
CommitMessage <- paste("updated on ", Sys.time(), sep = "")
wd <- getwd()
gitCommand <- paste("cd ", 
                    wd,
                    " \n git add . \n git commit --message '",
                    CommitMessage,
                    "' \n git push origin master \n",
                    sep = ""
                    )
system(command = gitCommand,
       intern = TRUE
)

GitHub Yedekleme

CommitMessage <- paste("updated on ", Sys.time(), sep = "")
wd <- getwd()
gitCommand <- paste("cd ", wd, " \n git add . \n git commit --message '", CommitMessage, 
    "' \n git push origin master \n", sep = "")

system(command = gitCommand, intern = TRUE)
[1] "[master 8066402] updated on 2019-09-29 18:08:41"      
[2] " 7 files changed, 421 insertions(+), 239 deletions(-)"
[3] " create mode 100644 images/RNotebook1.gif"            
[4] " create mode 100644 images/RNotebook2.gif"            
[5] " create mode 100644 images/RStudio-NewProject.gif"    

Her dökümanın sonuna kullandığınız kütüphaneler için atıf yazdırabilirsiniz

{r library citation, echo=TRUE}
citation()

Libraries Used

citation()

To cite R in publications use:

  R Core Team (2019). R: A language and environment for
  statistical computing. R Foundation for Statistical Computing,
  Vienna, Austria. URL https://www.R-project.org/.

A BibTeX entry for LaTeX users is

  @Manual{,
    title = {R: A Language and Environment for Statistical Computing},
    author = {{R Core Team}},
    organization = {R Foundation for Statistical Computing},
    address = {Vienna, Austria},
    year = {2019},
    url = {https://www.R-project.org/},
  }

We have invested a lot of time and effort in creating R, please
cite it when using it for data analysis. See also
'citation("pkgname")' for citing R packages.

Bu oturuma spesifik kullanılan paketler

report::cite_packages(session = sessionInfo())

data[order(data$References), ]

Ewen Harrison, Tom Drake and Riinu Ots (2019). finalfit: Quickly Create Elegant Regression Results Tables and Plots when Modelling. R package version 0.9.5. https://CRAN.R-project.org/package=finalfit H. Wickham. ggplot2: Elegant Graphics for Data Analysis. Springer-Verlag New York, 2016.
Hadley Wickham (2017). tidyverse: Easily Install and Load the ‘Tidyverse’. R package version 1.2.1. https://CRAN.R-project.org/package=tidyverse
Hadley Wickham (2019). forcats: Tools for Working with Categorical Variables (Factors). R package version 0.4.0. https://CRAN.R-project.org/package=forcats
Hadley Wickham (2019). stringr: Simple, Consistent Wrappers for Common String Operations. R package version 1.4.0. https://CRAN.R-project.org/package=stringr
Hadley Wickham and Lionel Henry (2019). tidyr: Tidy Messy Data. R package version 1.0.0. https://CRAN.R-project.org/package=tidyr
Hadley Wickham, Jim Hester and Romain Francois (2018). readr: Read Rectangular Text Data. R package version 1.3.1. https://CRAN.R-project.org/package=readr
Hadley Wickham, Romain François, Lionel Henry and Kirill Müller (2019). dplyr: A Grammar of Data Manipulation. R package version 0.8.3. https://CRAN.R-project.org/package=dplyr
Kirill Müller and Hadley Wickham (2019). tibble: Simple Data Frames. R package version 2.1.3. https://CRAN.R-project.org/package=tibble
Lionel Henry and Hadley Wickham (2019). purrr: Functional Programming Tools. R package version 0.3.2. https://CRAN.R-project.org/package=purrr
Therneau T (2015). A Package for Survival Analysis in S. version2.38, <URL: https://CRAN.R-project.org/package=survival>.

Tek tek paket atıfları

{r library citations}
citation("tidyverse")
{r}
citation("readxl")
citation("janitor")
citation("report")
citation("finalfit")
citation("ggstatplot")

Jamovi ve R için atıf örneği

The jamovi project (2019). jamovi. (Version 0.9) [Computer Software]. Retrieved from https://www.jamovi.org. R Core Team (2018). R: A Language and envionment for statistical computing. [Computer software]. Retrieved from https://cran.r-project.org/. Fox, J., & Weisberg, S. (2018). car: Companion to Applied Regression. [R package]. Retrieved from https://cran.r-project.org/package=car.

Her dökümanın sonuna oturum detaylarınızı yazdırabilirsiniz

{r session info, echo=TRUE}
sessionInfo()

Session Info

sessionInfo()
R version 3.6.0 (2019-04-26)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS Mojave 10.14.6

Matrix products: default
BLAS:   /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] stats     graphics  grDevices datasets  utils     methods   base     

other attached packages:
 [1] finalfit_0.9.5    survival_2.44-1.1 forcats_0.4.0    
 [4] stringr_1.4.0     dplyr_0.8.3       purrr_0.3.2      
 [7] readr_1.3.1       tidyr_1.0.0       tibble_2.1.3     
[10] ggplot2_3.2.1     tidyverse_1.2.1  

loaded via a namespace (and not attached):
 [1] nlme_3.1-141        bitops_1.0-6        matrixStats_0.55.0 
 [4] lubridate_1.7.4     RColorBrewer_1.1-2  httr_1.4.1         
 [7] tools_3.6.0         backports_1.1.4     R6_2.4.0           
[10] rpart_4.1-15        Hmisc_4.2-0         lazyeval_0.2.2     
[13] colorspace_1.4-1    jomo_2.6-9          nnet_7.3-12        
[16] withr_2.1.2         tidyselect_0.2.5    gridExtra_2.3      
[19] compiler_3.6.0      cli_1.1.0           rvest_0.3.4        
[22] formatR_1.7         htmlTable_1.13.2    mice_3.6.0         
[25] xml2_1.2.2          scales_1.0.0        checkmate_1.9.4    
[28] digest_0.6.21       foreign_0.8-72      minqa_1.2.4        
[31] rmarkdown_1.15      base64enc_0.1-3     pkgconfig_2.0.3    
[34] htmltools_0.3.6     lme4_1.1-21         htmlwidgets_1.3    
[37] rlang_0.4.0         readxl_1.3.1        rstudioapi_0.10    
[40] xaringan_0.12       pryr_0.1.4          generics_0.0.2     
[43] jsonlite_1.6        acepack_1.4.1       RCurl_1.95-4.12    
[46] magrittr_1.5        rapportools_1.0     Formula_1.2-3      
[49] Matrix_1.2-17       Rcpp_1.0.2          munsell_0.5.0      
[52] lifecycle_0.1.0     stringi_1.4.3       yaml_2.2.0         
[55] MASS_7.3-51.4       plyr_1.8.4          grid_3.6.0         
[58] promises_1.0.1      parallel_3.6.0      crayon_1.3.4       
[61] mitml_0.3-7         lattice_0.20-38     haven_2.1.1        
[64] splines_3.6.0       pander_0.6.3        summarytools_0.9.4 
[67] hms_0.5.1           magick_2.2          zeallot_0.1.0      
[70] knitr_1.25          pillar_1.4.2        tcltk_3.6.0        
[73] boot_1.3-23         revealjs_0.9        codetools_0.2-16   
[76] pan_1.6             servr_0.15          glue_1.3.1         
[79] evaluate_0.14       latticeExtra_0.6-28 data.table_1.12.2  
[82] renv_0.6.0-141      modelr_0.1.5        httpuv_1.5.2       
[85] vctrs_0.2.0         nloptr_1.2.1        cellranger_1.1.0   
[88] gtable_0.3.0        assertthat_0.2.1    xfun_0.9           
[91] mime_0.7            broom_0.5.2         later_0.8.0        
[94] cluster_2.1.0      

Sonraki Konular

  • RStudio ile GitHub kullanımı

Sunum Linkleri

https://sbalci.github.io/MyRCodesForDataAnalysis/R-Markdown.nb.html https://sbalci.github.io/MyRCodesForDataAnalysis/R-Markdown.html

https://forms.gle/UqGJBiAjB8uLPRon8

Geri Bildirim

İletişim

Completed on 2019-09-30.

Serdar Balci, MD, Pathologist

https://rpubs.com/sbalci/CV
https://sbalci.github.io/
https://github.com/sbalci
https://twitter.com/serdarbalci


  1. Bu bir derlemedir, mümkün mertebe alıntılara linklerle referans vermeye çalıştım.↩︎